package cn.tonyandmoney.lib.webmagic.pages;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * Created by niantuo on 2019/2/14.
 */

public class OSChinaPageProcessor implements PageProcessor {
    private Site site = Site.me().setDomain("my.oschina.net");

    @Override
    public void process(Page page) {
        Selectable allLinks = page.getHtml().links();
        List<String> links = allLinks.regex("https://my\\.oschina\\.net/flashsword/blog/\\d+").all();
        page.addTargetRequests(links);
        page.putField("title", page.getHtml().xpath("//div[@class='BlogEntity']/div[@class='BlogTitle']/h1").toString());
        page.putField("content", page.getHtml().$("div.content").toString());
        page.putField("tags",page.getHtml().xpath("//div[@class='BlogTags']/a/text()").all());
    }

    @Override
    public Site getSite() {
        return site;

    }


}
